# d ;


		
		
use "../data/hpms_hwy_stats_88_08.dta", clear  ;


tab year hwy_hpms ;

count ;
do "_setup_expenditure.do";
count;

* All
*------------------------------;
preserve ;

drop if year<1992 | year>2008;
drop if state==11;

	collapse (mean) exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r (sum)lane_miles  (count) segments=id_num , by(state year);
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments (count) N=lane_miles N_HS=exp_IRI_IH_SF12a_r, by( year) ;
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments N N_HS;
	gen base="All";
	gen sample="Sample";
	save temp_All, replace;
restore;	


* Merge
*------------------------------;
preserve ;

drop if year<1992 | year>2008;
drop if state==11;

	keep if hwy_hpms==3;
	collapse (mean) exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r (sum)lane_miles  (count) segments=id_num , by(state year);
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments (count) N=lane_miles N_HS=exp_IRI_IH_SF12a_r, by( year) ;
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments N N_HS;
	gen base="Merge";
	gen sample="Sample";
	save temp_merge, replace;	
restore;	


count ;



preserve ;
	*calculate differences in miles and IRI;
	*local D_vars "u_iri_IH u_iri_urban u_iri_rural u_lane_miles_IH u_lane_miles_rural u_lane_miles_urban";
	local D_vars iri ;
	sort state id year ;
	foreach varname of local D_vars{;
		*changed from leads to lags, 20190429;
		gen D`varname'=cond(
				id[_n]==id[_n-1]&
				state[_n]==state[_n-1]&
				`varname'[_n]!=.&`varname'[_n-1]!=.&
				`varname'[_n]!=0&`varname'[_n-1]!=0&
				year[_n]==year[_n-1]+1,
				`varname'[_n]-`varname'[_n-1],.);
		gen lag_`varname'=cond(
				id[_n]==id[_n-1]&
				state[_n]==state[_n-1]&
				`varname'[_n]!=.&`varname'[_n-1]!=.&
				`varname'[_n]!=0&`varname'[_n-1]!=0&
				year[_n]==year[_n-1]+1,
				`varname'[_n-1],.);		
				};

				
	keep if hwy_hpms==3;
	gen lane_miles_x=I2_t*lane_miles;
	bysort state year : egen lane_milesI2_st=total(lane_miles_x);
	bysort state year : egen I2_st=total(I2_t);
	gen  D_I=cond(lane_milesI2_st==.|lane_milesI2_st==0,1,.);
	gen  D_exp=cond(exp_IRI_IH_SF12a_r==0  | exp_IRI_IH_SF12a_r==.,1,.)  ;

	collapse (sum) lane_milesI2_st lane_miles_x (count) segments=id_num N_iri=Diri N_I2= D_I N_no_exp=D_exp, by(state year);
	gen sh_iri=(1-N_iri/segments);
	gen iri_missing=cond(sh_iri==1,1,0);
	label var 	sh_iri "Missing IRI";
	gen sh_I2=(N_I2/segments);
	label var 	sh_I2 "No resurfaced lanes in state year";
	gen sh_N_no_exp=(N_no_exp/segments);
	
	gen sh_I= lane_miles_x/lane_milesI2_st;
	
	
	drop if year<1992 | year>2008;
	drop if state==11;
	keep sh_ir sh_I2 sh_N_no_exp sh_I iri_missing state year;
	
	gen missing=100*iri_missing+10*sh_I2+sh_N_no_ex;

	label define missing 0 "No missings" 1 "No expenditure" 10 "No resurfacing events" 11 "No resurfacing and no expenditure" 100 "No Iri" 110 "No IRI no resurfacing";
	label values missing missing;


	estpost tab missing, m label;
	esttab . using "${output}/tables/TableA2_Data_Appendix_missing_sample.tex", cells("b") noobs
	 label nogaps  compress replace
	 
	 title("Accounting for state-years in merge of hpms Sample and Highway Statistics")
					substitute(
"0               &" "No missings&"
"1               &" "No expenditure&"
"10              &" "No resurfacing events&"
"11              &"  "No resurfacing and no expenditure&" 
"100             &"  "No Iri&"
"110             &" "No IRI no resurfacing&"
                "&\multicolumn{1}{c}{(1)}\\" ""
                "&\multicolumn{1}{c}{missing}\\" ""
                "&        b\\" " &        N\\"
 )
 

;	
					

restore;	

*Estimating sample Data
*------------------------------;
do "_cleaning_and_new_variables_sample.do";
tab year;

preserve ;
	keep if hwy_hpms==3;
	collapse (mean) exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r (sum)lane_miles  (count) segments=id_num , by(state year);
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments (count) N=lane_miles N_HS=exp_IRI_IH_SF12a_r, by( year) ;
	collapse (sum)lane_miles exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r segments N N_HS;
	gen base="Est_data";
	gen sample="Sample";
	save temp_reg, replace;	
restore;	


* Investigating which observations we drop;
*------------------------------;

tab year;






clear ;


*----------------------------------------------------------------------------------;
*   UNIVERSE
*----------------------------------------------------------------------------------;


*Define price index ;
use "../data/state_year_all_80_15.dta", clear  ;
drop if state==11;
drop if year<1984   | year>2008;
*---------------------------------------------------;
* - Generate variables and general cleanings ;
*---------------------------------------------------;

do "_setup_expenditure.do";

* All
*------------------------------;
preserve ;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  (count) N=u_lane_miles_IH N_HS=exp_L_IH_SF12a_r , by( year) ;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  N N_HS;
	gen base="All";
	gen sample="Universe";
	ren u_lane_miles_IH lane_miles;
	save temp_All_universe, replace;
restore;	


* Merge
*------------------------------;
preserve ;
	keep if hwy_hpms_universe==3;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  (count) N=u_lane_miles_IH N_HS=exp_L_IH_SF12a_r , by( year) ;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  N N_HS;
	gen base="Merge";
	gen sample="Universe";
	ren u_lane_miles_IH lane_miles;
	save temp_merge_universe, replace;	
restore;	


* Data
*------------------------------;
do "_cleaning_and_new_variables.do";

tab year periods ;
gen no_exp=cond(exp_L_IH_SF12a_r==. | exp_L_IH_SF12a_r==. | exp_L_IH_SF12a_r<0,1,0);
;
drop if exp_L_IH_SF12a_r==.;
drop if exp_L_IH_SF12a_r==0;
drop if exp_L_IH_SF12a_r<0;

count ;
tab year ;

preserve ;
	keep if  hwy_hpms_universe==3;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  (count) N=u_lane_miles_IH N_HS=exp_L_IH_SF12a_r , by( year) ;
	collapse (sum)u_lane_miles_IH exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r  N N_HS;
	gen base="Est_data";
	gen sample="Universe";
	ren u_lane_miles_IH lane_miles;
	save temp_reg_universe, replace;	
restore;	


clear ;


append using temp_reg_universe temp_merge_universe temp_All_universe  temp_reg temp_merge temp_All ;


ren segments Segments;

egen Total=rowtotal(exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r);
reshape wide  lane_miles Total exp_L_IH_SF12a_r exp_IRI_IH_SF12a_r exp_IH_mtn_SF12a_r Segments N N_HS, i(sample) j( base) string ;
# d ;
ren *Est_data Est_data*;
ren *Merge Merge*;
ren *All All*;


reshape long  All Merge Est_data , j(var) i( sample) string ;
drop if sample=="Universe" & var=="Segments";

* Export Table ;
**EXTRA LINES: \hline\hline \bigskip\\[6ex]) ; 
*settings for latex tables ;
		
		
		global prehead_start "prehead(\begin{table}[!htb]\centering 
		\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}  
		\caption{@title}
		\setlength{\tabcolsep}{1pt}";
		
		global prehead_start_longcol "prehead(\begin{table}[!htb]\centering 
		\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}  
		\caption{@title}
		\setlength{\tabcolsep}{7pt}";
		local font_size "";
		
		global prehead_end         "\begin{tabular}{l*{3}{c}}\hline\hline)";
		
		global prehead_end_bigskip "\begin{tabular}{l*{3}{c}}\hline\hline \bigskip\\[6ex])";
		
		global postfoot "postfoot(\hline\hline
		\multicolumn{@span}{}{ \footnotesize @note. @starlegend. }
		\end{tabular}\end{table})";
		
		
		
		
		
			local tex_settigns  = `"
		${prehead_start}
		`font_size'
		${prehead_end}
		${postfoot}
		"';	



format  All Merge Est_data %15.0fc;

replace sample = "B_Highway_Statistics_1992-2008" if sample=="Sample"
											  & ( var=="exp_IH_mtn_SF12a_r"
											   | var=="exp_IRI_IH_SF12a_r"
											   | var=="exp_L_IH_SF12a_r"
											   | var=="Total"
											   | var=="N_HS") ;
											   
											   								   
replace sample = "A_HMPS_Sample_1992-2008" if sample=="Sample";

replace sample = "D_Highway_Statistics_1984-2008" if sample=="Universe"
											  & ( var=="exp_IH_mtn_SF12a_r"
											   | var=="exp_IRI_IH_SF12a_r"
											   | var=="exp_L_IH_SF12a_r"
											   | var=="Total"
											   | var=="N_HS"
											   ) ;
replace sample = "C_HPSM_Universe_1984-2008" if sample=="Universe";			

sort sample var ;

mkmat  All Merge Est_data , matrix(Appendix_table) rownames(var)  roweq(sample) ;


	esttab matrix(Appendix_table,fmt(%15.0fc) )
					using "${output}/tables/TableA1_Data_Appendix.tex",  replace
					title("Dataset Composition\label{tab:appendix_table}")
					noomitted   nobaselevels nomtitles
						substitute("lane_miles" "Lane Miles"
							       "exp_L_IH_SF12a_r" "Construction"
								   "exp_IRI_IH_SF12a_r" "Resurfacing"
								   "exp_IH_mtn_SF12a_r" "Maintenance"
								   "Est_data" "Estimating sample"
								   "Total" "Total Expenditure"
								   "N_HS" "N"
 "A_HMPS_Sample_1992-2008&         &         &         \\" "HMPS Sample 1992-2008\\\hline"
"B_Highway_Statistics_1992-2008&         &         &         \\"  "Highway Statistics 1992-2008 \\\hline"
"D_Highway_Statistics_1984-2008&         &         &         \\"  "Highway Statistics 1984-200\\\hline"
"C_HPSM_Universe_1984-2008&         &         &         \\""HPSM Universe 1984-2008\\ \hline"
								
										)
					 label nogaps  compress
					 
					`tex_settigns';	

					#d ;
foreach x in temp_reg temp_merge temp_All temp_reg_universe temp_merge_universe temp_All_universe{;
    erase `x'.dta;
};



